sc10x analysis

load dependancies

Baf53cre Ahr-cko vs ctl,
CR infection,
Intestinal RORgt+ Immune Cells

loading 72,000 for all
cellranger calling 34,620 cells, mean reads 11,163

t0 initial

load 10x data

filt.10x <- Read10X(data.dir = "I:/Shared_win/projects/20230927_10x_SZJ/output_demo/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 34620
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGCCTGAAG-1 AAACCCAAGCGCATCC-1 AAACCCAAGTTGCCCG-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     8 34620
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##       AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1
## CKO.1                 10                 15                 12
## CKO.2               1763                 37                 30
## CKO.3                 17                 35                 17
## CKO.4                 10                 23                497
## CTL.1                  3                  8                  4
## CTL.2                  5                 33                 21
## CTL.3                  9                641                 16
## CTL.4                 11                 15                 12
##       AAACCCAAGCCTGAAG-1 AAACCCAAGCGCATCC-1 AAACCCAAGTTGCCCG-1
## CKO.1                574                  7                  5
## CKO.2                 23                 17                 12
## CKO.3                 14                 19                  4
## CKO.4                 14                 14                545
## CTL.1                  3                  5                310
## CTL.2                  4                612                  5
## CTL.3                  8                 13                  6
## CTL.4                  6                  5                  6
rowSums(FB)
##   CKO.1   CKO.2   CKO.3   CKO.4   CTL.1   CTL.2   CTL.3   CTL.4 
## 3828390 7470222 4598138 4351086 3110845 4072731 4035048 3556755
rowSums(FB>0)
## CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2 CTL.3 CTL.4 
## 34523 34592 34593 34584 34307 34564 34567 34511

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "ILC3_FB")

FB.seur
## An object of class Seurat 
## 8 features across 34620 samples within 1 assay 
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CKO.1" "CKO.2" "CKO.3" "CKO.4" "CTL.1" "CTL.2" "CTL.3" "CTL.4"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
#FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

#scales::show_col(ggsci::pal_igv("default")(49))
#scales::show_col(ggsci::pal_d3("category20b")(20))
#scales::show_col(ggsci::pal_d3("category20c")(20))
#
color.FB <- ggsci::pal_d3("category20c")(20)[c(2,7,12,17,
                                               1,6,11,16
                                               )]


level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 4)

scales::show_col(color.FB, ncol = 4)

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

range ‘q’

q0.50 ~ 0.99
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CKO|CTL",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])

plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

q0.9900 ~ 0.9999
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CKO|CTL",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])

plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

demultiplexing

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CKO.1 : 85 reads
## Cutoff for CKO.2 : 237 reads
## Cutoff for CKO.3 : 46 reads
## Cutoff for CKO.4 : 121 reads
## Cutoff for CTL.1 : 23 reads
## Cutoff for CTL.2 : 160 reads
## Cutoff for CTL.3 : 86 reads
## Cutoff for CTL.4 : 92 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9365      296    24959
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9365      296     2931     2977     3207     3122     3356     3146 
##    CTL.3    CTL.4 
##     3150     3070
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CKO.1 : 89 reads
## Cutoff for CKO.2 : 249 reads
## Cutoff for CKO.3 : 47 reads
## Cutoff for CKO.4 : 126 reads
## Cutoff for CTL.1 : 24 reads
## Cutoff for CTL.2 : 169 reads
## Cutoff for CTL.3 : 90 reads
## Cutoff for CTL.4 : 97 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9317      315    24988
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9317      315     2937     2984     3213     3123     3357     3143 
##    CTL.3    CTL.4 
##     3153     3078
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CKO.1 : 94 reads
## Cutoff for CKO.2 : 265 reads
## Cutoff for CKO.3 : 49 reads
## Cutoff for CKO.4 : 134 reads
## Cutoff for CTL.1 : 25 reads
## Cutoff for CTL.2 : 181 reads
## Cutoff for CTL.3 : 95 reads
## Cutoff for CTL.4 : 103 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9236      368    25016
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9236      368     2946     2987     3221     3128     3359     3129 
##    CTL.3    CTL.4 
##     3162     3084
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CKO.1 : 102 reads
## Cutoff for CKO.2 : 287 reads
## Cutoff for CKO.3 : 51 reads
## Cutoff for CKO.4 : 145 reads
## Cutoff for CTL.1 : 27 reads
## Cutoff for CTL.2 : 197 reads
## Cutoff for CTL.3 : 102 reads
## Cutoff for CTL.4 : 112 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9155      459    25006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9155      459     2944     2988     3229     3134     3344     3106 
##    CTL.3    CTL.4 
##     3166     3095
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CKO.1 : 115 reads
## Cutoff for CKO.2 : 325 reads
## Cutoff for CKO.3 : 55 reads
## Cutoff for CKO.4 : 163 reads
## Cutoff for CTL.1 : 29 reads
## Cutoff for CTL.2 : 225 reads
## Cutoff for CTL.3 : 115 reads
## Cutoff for CTL.4 : 127 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9014      595    25011
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9014      595     2960     2977     3242     3136     3348     3067 
##    CTL.3    CTL.4 
##     3178     3103
## tags q0.99
#cutoff.FB <- c(85,237,46,121,
#               23,160,86,92)
## custom
# ref
# CKO.1  q0.996  102
# CKO.2  q0.99  237
# CKO.3  q0.998  55
# CKO.4  q0.99  121
# CTL.1  q0.994  25
# CTL.2  q0.99  160
# CTL.3  q0.996  102
# CTL.4  q0.996  112
#
cutoff.FB <- c(102,237,55,121,
               25,160,102,112)

names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2 CTL.3 CTL.4 
##   102   237    55   121    25   160   102   112
data.frame(cutoff.FB=cutoff.FB)
##       cutoff.FB
## CKO.1       102
## CKO.2       237
## CKO.3        55
## CKO.4       121
## CTL.1        25
## CTL.2       160
## CTL.3       102
## CTL.4       112
par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")

plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")

plot(sort(t(FB)[,1],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 34620     2
df.FB[1:10,]
##                    HTO_classification.global hash.ID
## AAACCCAAGAACAGGA-1                   Singlet   CKO.2
## AAACCCAAGAATTGCA-1                   Singlet   CTL.3
## AAACCCAAGCAGCCCT-1                   Singlet   CKO.4
## AAACCCAAGCCTGAAG-1                   Singlet   CKO.1
## AAACCCAAGCGCATCC-1                   Singlet   CTL.2
## AAACCCAAGTTGCCCG-1                   Doublet Doublet
## AAACCCACAAGAGTGC-1                   Doublet Doublet
## AAACCCACAAGTATCC-1                   Doublet Doublet
## AAACCCACAGAAGTGC-1                   Singlet   CTL.3
## AAACCCACAGCGCTTG-1                   Singlet   CKO.4
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
##                  Doublet     9197        0     0     0     0     0     0     0
##                  Negative       0      337     0     0     0     0     0     0
##                  Singlet        0        0  2935  2997  3221  3135  3393  3162
##                          hash.ID
## HTO_classification.global CTL.3 CTL.4
##                  Doublet      0     0
##                  Negative     0     0
##                  Singlet   3159  3084
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAACAGGA-1    ILC3_FB       1828            8       1828            8
## AAACCCAAGAATTGCA-1    ILC3_FB        807            8        807            8
## AAACCCAAGCAGCCCT-1    ILC3_FB        609            8        609            8
## AAACCCAAGCCTGAAG-1    ILC3_FB        646            8        646            8
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAACAGGA-1     CKO.2        CTL.4   3.211363              CKO.2
## AAACCCAAGAATTGCA-1     CTL.3        CTL.2   2.459500              CTL.3
## AAACCCAAGCAGCCCT-1     CKO.4        CTL.2   2.304885              CKO.4
## AAACCCAAGCCTGAAG-1     CKO.1        CKO.4   3.040913              CKO.1
##                    HTO_classification.global hash.ID
## AAACCCAAGAACAGGA-1                   Singlet   CKO.2
## AAACCCAAGAATTGCA-1                   Singlet   CTL.3
## AAACCCAAGCAGCCCT-1                   Singlet   CKO.4
## AAACCCAAGCCTGAAG-1                   Singlet   CKO.1
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,28500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1980,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0809.seur.subset.rds")
FB.seur.subset <- readRDS("FB0809.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID.sort', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

this is not bad ! with comfortable colors/shapes.
(while for sn10x data, might should be careful to open your eyes~)

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID.sort)
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9197      337     2935     2997     3221     3135     3393     3162 
##    CTL.3    CTL.4 
##     3159     3084
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "ILC3_GEX")
GEX.seur
## An object of class Seurat 
## 16621 features across 34620 samples within 1 assay 
## Active assay: RNA (16621 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative    CKO.1    CKO.2    CKO.3    CKO.4    CTL.1    CTL.2 
##     9197      337     2935     2997     3221     3135     3393     3162 
##    CTL.3    CTL.4 
##     3159     3084
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 16621 features across 25086 samples within 1 assay 
## Active assay: RNA (16621 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 7.5 & nFeature_RNA > 500 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat 
## 16621 features across 24449 samples within 1 assay 
## Active assay: RNA (16621 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4500),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+325,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))


GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", cols = color.FB,
    ncol = 2, pt.size = 0.0)  & geom_jitter(alpha=0.012)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

some few cycling should exist.

VariableFeatures

GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Hist1h1b"      "Il17a"         "Gzma"          "Ccl4"         
##   [5] "Il22"          "Ccl5"          "Ccl1"          "Cxcl2"        
##   [9] "Cxcl10"        "Areg"          "Xcl1"          "Cd74"         
##  [13] "Hist1h2ae"     "Penk"          "Calca"         "Cxcl1"        
##  [17] "Stmn1"         "Ccl20"         "Hspa1a"        "S100g"        
##  [21] "Il13"          "Il4"           "Ube2c"         "Il5"          
##  [25] "Gzmc"          "H2-Aa"         "Pclaf"         "Cxcl3"        
##  [29] "H2-Eb1"        "Tnfrsf4"       "Il17f"         "S100a6"       
##  [33] "Gzmb"          "Ifi27l2a"      "Lgals1"        "Egr1"         
##  [37] "Cxcl9"         "Ifng"          "Hs3st1"        "Il10"         
##  [41] "Ccl3"          "Trdc"          "Stmn2"         "Sostdc1"      
##  [45] "Il2"           "Top2a"         "Mki67"         "Serpine1"     
##  [49] "Pcp4"          "Csf2"          "Tph1"          "H2-Ab1"       
##  [53] "Defa24"        "Hist1h2ap"     "Birc5"         "Plac8"        
##  [57] "Hist1h3c"      "Lgals3"        "Cenpf"         "Try5"         
##  [61] "Hmgb2"         "Hspb1"         "Jun"           "Ctla2a"       
##  [65] "Hist1h2ab"     "Hspa1b"        "Ifitm2"        "S100a4"       
##  [69] "Egr2"          "Serpinb1a"     "Rrm2"          "Ccr7"         
##  [73] "Ly6a"          "Ifit1"         "Cd36"          "Ly6d"         
##  [77] "Ifitm1"        "Ctla4"         "Akap12"        "Actb"         
##  [81] "Crip1"         "Bambi"         "Rrad"          "Tmsb4x"       
##  [85] "Defa21"        "Rsad2"         "Il1b"          "Bcl2a1b"      
##  [89] "Tnfsf8"        "Tubb5"         "Defa22"        "Gzmf"         
##  [93] "Mt1"           "Hist1h1e"      "Gzme"          "Cdca8"        
##  [97] "Nusap1"        "Apoe"          "Tubb2a"        "Tyrobp"       
## [101] "Egr3"          "Igkc"          "Spp1"          "AA467197"     
## [105] "Dusp14"        "Fabp5"         "Vim"           "Gadd45b"      
## [109] "C2cd4b"        "Nkg7"          "Dppa2"         "Klf2"         
## [113] "Ifitm3"        "Hes1"          "Igha"          "Lyz2"         
## [117] "Cd7"           "G0s2"          "Fos"           "Defa30"       
## [121] "Fabp4"         "Rgcc"          "Lyz1"          "Ptgs2"        
## [125] "Cd83"          "Ccnb2"         "Hbegf"         "Hopx"         
## [129] "Il1r2"         "Ms4a4b"        "Isg15"         "Actg2"        
## [133] "Gzmd"          "Jchain"        "Rasl11a"       "Gadd45g"      
## [137] "Iigp1"         "Hba-a1"        "Gata3"         "Cldn5"        
## [141] "Zfp36"         "Ptpn13"        "Gm10827"       "Cks1b"        
## [145] "Dnaja1"        "Klrd1"         "Trbc1"         "Trac"         
## [149] "Ermn"          "Arg1"          "Klra5"         "Atf3"         
## [153] "Hmmr"          "Ccna2"         "Clspn"         "AY761184"     
## [157] "Lgals7"        "Tyms"          "Slpi"          "Hilpda"       
## [161] "Vcam1"         "Il21"          "Il1rl1"        "Muc3"         
## [165] "Spc24"         "Gfod2"         "Tuba1b"        "Izumo1r"      
## [169] "Dnajb1"        "Plek"          "Cd5"           "Odc1"         
## [173] "Bcl2a1a"       "1700061F12Rik" "Cd3g"          "Fbxo5"        
## [177] "Klra1"         "H2afz"         "Cdca3"         "Tpx2"         
## [181] "Dennd5b"       "Acod1"         "Klra7"         "Fst"          
## [185] "Cd163l1"       "Trdv4"         "Kif11"         "Igfbp4"       
## [189] "Il6"           "Cd24a"         "Hist1h1d"      "Serpinb9"     
## [193] "Socs2"         "Gm14851"       "Esco2"         "Ier3"         
## [197] "Cenpe"         "Serpinb6b"     "Chad"          "Cd52"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Fcer1g, Tmem176a, Tmem176b, Gem, Itm2b, Pxdc1, Sdc4, Il22, Car2, Nrgn 
##     Cdkn1a, Zfp36l1, Eprs, Ckb, Prr29, Ccdc184, Klrb1b, Hk2, Klf4, Cwc25 
##     Odc1, Tnfsf11, Rgs1, Atf3, Arrdc4, Fam110a, Gpx1, Ccrl2, Dad1, Rgcc 
## Negative:  Cd3d, Cd3g, Cd2, Gramd3, Ms4a4b, Fyb, Gimap6, Cd28, Cd3e, Ms4a6b 
##     Ifi27l2a, Il21r, Dusp10, Lef1, Klf2, Lat, Pmepa1, Slfn2, Ccr7, Satb1 
##     Cd5, S1pr1, Stk4, Gimap4, Gimap1, Cd27, Coro1a, Cd247, Cd6, Sh2d1a 
## PC_ 2 
## Positive:  Ccr7, Vegfa, Cdc14a, Hmgn3, Cd81, Bst2, S1pr1, Klf2, Igfbp4, Sdc4 
##     Lef1, Cx3cl1, Batf3, Ramp1, Rflnb, Cryaa, Cd74, Bmp2, Irs2, Myof 
##     Rgcc, Nrp1, Ttn, Cited2, Ccr6, Tgm2, Lmna, Bcl2, Ctsl, Sell 
## Negative:  Tmsb4x, Bcl2a1d, Cxcr6, Pfn1, Serpinb9, Lgals1, Capg, Ucp2, Icos, Serpinb6b 
##     Pik3r1, Cd52, Ctsw, Bcl2a1b, Serpinb1a, Actn2, Srgn, Ptprcap, Ltb4r1, S100a6 
##     Maf, Clic1, Slc16a6, S100a11, Ikzf2, Rgs1, Slc7a6os, Arl5c, Gzmb, Vim 
## PC_ 3 
## Positive:  Hs3st1, Calca, Ptpn13, Il5, Gata3, Ccl1, Klrg1, Il17rb, Il13, Il1rl1 
##     Ipmk, Areg, Il4, Tspan13, Slc7a8, Csf2, Deptor, Otulinl, Ly6a, Hes1 
##     Klf5, Ctla2a, Ifitm2, Tnfrsf18, Mras, Ccr4, Spcs3, Gpx4, Rab27b, Dgat2 
## Negative:  Serpinb9, Lef1, Serpinb6b, Fcer1g, Car2, Klrk1, Gimap4, Serpinb1a, Pik3r1, Ms4a4b 
##     Upp1, Ccr7, Tubb2a, Klf2, Rflnb, S1pr1, Rbpms2, Ctsw, Slc7a6os, Gramd3 
##     Ckb, Klrb1b, Ms4a6b, Gatad1, Gzmc, Calm1, Slc16a6, Pcp4, Irf7, Sell 
## PC_ 4 
## Positive:  Hs3st1, Serpinb9, Lmo4, Calca, Serpinb6b, Csf2, Ctsw, Pik3r1, Il5, Il17rb 
##     Ptpn13, Ccl1, Sub1, Gata3, Il13, Socs2, Ikzf2, Basp1, Nkg7, Igsf5 
##     Satb1, Rab4a, Upp1, Klrg1, Klrk1, Il4, Dusp10, Ncr1, Ppfia1, Epas1 
## Negative:  Actb, Pclaf, Tnfrsf4, S100a4, Spc24, Ctla4, Actg1, Ccr6, S100a6, Stmn1 
##     Vim, Lgals1, Bst2, Gpx1, Hmgn3, Ostf1, Odc1, Asf1b, Ramp1, Cd5 
##     Il17a, Ccna2, Cd4, Cdc14a, Racgap1, S100a10, Vegfa, Nmrk1, Tk1, Nebl 
## PC_ 5 
## Positive:  Bcl2a1b, Ccl5, Cd3g, Ccr2, Cd3e, Hopx, Ccr5, Ctla4, Nkg7, Got1 
##     Plac8, Fasl, Fth1, S100a10, Sh2d2a, Cd6, Cd163l1, Actn2, Il10, Cd52 
##     S100a6, Odc1, Tnfrsf4, Cd3d, Cd40lg, S100a11, Il12rb2, Ltb, Gimap7, Ubald2 
## Negative:  Pclaf, Stmn1, Spc24, Ccna2, Asf1b, Kif4, Knl1, Tk1, Cenpm, Smc2 
##     Shcbp1, Racgap1, Tubb5, Spc25, Nrm, Kif15, Mcm3, Cdkn3, Cenph, Tuba1b 
##     Pbk, Aspm, Hmgn2, Esco2, Ska1, Ncapg2, Diaph3, Bub1b, Ptma, Dut
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1224
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Il17a"     "Gzma"      "Ccl4"      "Il22"      "Ccl5"      "Ccl1"     
##   [7] "Cxcl2"     "Cxcl10"    "Areg"      "Xcl1"      "Cd74"      "Penk"     
##  [13] "Calca"     "Cxcl1"     "Stmn1"     "Ccl20"     "S100g"     "Il13"     
##  [19] "Il4"       "Il5"       "Gzmc"      "H2-Aa"     "Pclaf"     "Cxcl3"    
##  [25] "H2-Eb1"    "Tnfrsf4"   "Il17f"     "S100a6"    "Gzmb"      "Ifi27l2a" 
##  [31] "Lgals1"    "Egr1"      "Cxcl9"     "Ifng"      "Hs3st1"    "Il10"     
##  [37] "Ccl3"      "Stmn2"     "Sostdc1"   "Il2"       "Serpine1"  "Pcp4"     
##  [43] "Csf2"      "Tph1"      "H2-Ab1"    "Defa24"    "Plac8"     "Lgals3"   
##  [49] "Try5"      "Ctla2a"    "Ifitm2"    "S100a4"    "Egr2"      "Serpinb1a"
##  [55] "Ccr7"      "Ly6a"      "Ifit1"     "Cd36"      "Ly6d"      "Ifitm1"   
##  [61] "Ctla4"     "Akap12"    "Actb"      "Crip1"     "Bambi"     "Rrad"     
##  [67] "Tmsb4x"    "Defa21"    "Rsad2"     "Il1b"      "Bcl2a1b"   "Tnfsf8"   
##  [73] "Tubb5"     "Defa22"    "Gzmf"      "Mt1"       "Gzme"      "Apoe"     
##  [79] "Tubb2a"    "Tyrobp"    "Egr3"      "Spp1"      "Dusp14"    "Fabp5"    
##  [85] "Vim"       "Gadd45b"   "C2cd4b"    "Nkg7"      "Dppa2"     "Klf2"     
##  [91] "Ifitm3"    "Hes1"      "Lyz2"      "Cd7"       "G0s2"      "Defa30"   
##  [97] "Fabp4"     "Rgcc"      "Lyz1"      "Ptgs2"     "Cd83"      "Hbegf"    
## [103] "Hopx"      "Il1r2"     "Ms4a4b"    "Isg15"     "Actg2"     "Gzmd"     
## [109] "Rasl11a"   "Gadd45g"   "Iigp1"     "Gata3"     "Cldn5"     "Zfp36"    
## [115] "Ptpn13"    "Klrd1"     "Ermn"      "Arg1"      "Klra5"     "Atf3"     
## [121] "Ccna2"     "Lgals7"    "Slpi"      "Hilpda"    "Vcam1"     "Il21"     
## [127] "Il1rl1"    "Muc3"      "Spc24"     "Gfod2"     "Tuba1b"    "Izumo1r"  
## [133] "Plek"      "Cd5"       "Odc1"      "Bcl2a1a"   "Cd3g"      "Fbxo5"    
## [139] "Klra1"     "H2afz"     "Dennd5b"   "Acod1"     "Klra7"     "Fst"      
## [145] "Cd163l1"   "Igfbp4"    "Il6"       "Cd24a"     "Serpinb9"  "Socs2"    
## [151] "Esco2"     "Ier3"      "Serpinb6b" "Chad"      "Cd52"      "Eprs"     
## [157] "Dut"       "Adra2a"    "Nrgn"      "Olfm4"     "Ccr2"      "Akr1c18"  
## [163] "Tent5a"    "Cd3d"      "Arhgef40"  "Abcd2"     "Aldh1a3"   "Gramd3"   
## [169] "Ccnd1"     "Gem"       "Cd2"       "Id2"       "Tnfsf11"   "Nlrp3"    
## [175] "Klra6"     "Lef1"      "Nkx6-2"    "Prss2"     "Smc2"      "Tpm2"     
## [181] "Bcl2a1d"   "Egr4"      "Arl5c"     "Vegfa"     "Cd28"      "Lmo4"     
## [187] "Upp1"      "Gbp2"      "Adm"       "Tnfrsf9"   "Dnase1l3"  "Tnf"      
## [193] "Sptssb"    "Gcg"       "Actn2"     "Ccr3"      "Cd70"      "Pld4"     
## [199] "Basp1"     "Rflnb"     "Ptcra"     "Ptprz1"    "Cebpd"     "Sox4"     
## [205] "Evx1os"    "Palld"     "Cdkn1a"    "Cst8"      "Coro1a"    "Serpine2" 
## [211] "Krt8"      "Irf7"      "Nmrk1"     "Rnd3"      "Rgs1"      "Klre1"    
## [217] "Mcm3"      "Lmna"      "S1pr1"     "Gpr83"     "Bst2"      "Dusp2"    
## [223] "Kcnq1ot1"  "Aspm"      "Lgals2"    "Foxf1"     "Hmgn2"     "Pcdh10"   
## [229] "Zg16"      "Atp1b2"    "Ccr1"      "Anxa2"     "Clu"       "H1f0"     
## [235] "Rapsn"     "Emp3"      "Kif4"      "Timp3"     "Pfn1"      "Sh2d1a"   
## [241] "Mmp10"     "Foxp3"     "Ccr5"      "Klrg1"     "Gla"       "Msx1"     
## [247] "Cd40lg"    "Rgs16"     "S100a10"   "Serpinb2"  "Krt18"     "Plk2"     
## [253] "Ms4a6b"    "Marcksl1"  "Rgs10"     "Npy"       "Hoxa7"     "Csn3"     
## [259] "Ccl7"      "Foxd1"     "H1fx"      "Mdk"       "Lgmn"      "Tnfaip2"  
## [265] "Runx1t1"   "Tk1"       "Dapl1"     "Irf4"      "Tulp3"     "Zfp503"   
## [271] "Asb4"      "Zbtb32"    "Asf1b"     "Lig1"      "Nr4a1"     "Gimap7"   
## [277] "Bmp2"      "Cwc25"     "H2afx"     "Fzd8"      "Pcdh18"    "Peg3"     
## [283] "Hebp1"     "Ccrl2"     "Irf8"      "Ttn"       "Ifit3"     "Pmepa1"   
## [289] "Ly6c2"     "Edn1"      "Il22ra2"   "Klra9"     "Apold1"    "Serpina3f"
## [295] "Tshz2"     "Elavl2"    "Vgf"       "Ccn2"      "Ccl12"     "Cth"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:27
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 10)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 24449
## Number of edges: 333437
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7863
## Number of communities: 39
## Elapsed time: 2 seconds
## 1 singletons identified. 38 final clusters.

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 153)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:18:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:18:02 Read 24449 rows and found 27 numeric columns
## 10:18:02 Using Annoy for neighbor search, n_neighbors = 20
## 10:18:02 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:18:05 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpIPh7o5\file55d47427bdd
## 10:18:05 Searching Annoy index using 1 thread, search_k = 2000
## 10:18:11 Annoy recall = 100%
## 10:18:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 10:18:13 Initializing from normalized Laplacian + noise (using irlba)
## 10:18:14 Commencing optimization for 200 epochs, with 726526 positive edges
## 10:18:38 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
                       levels = c("CTL",
                                  "CKO"))
color.cnt <- color.FB[c(5,1)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

check some markers

markers.mig <- c("Ptprc","Cd3d","Cd3e","Trdc",
                 "Tbx21","Ifng","Gata3","Il5",
                 "Il10","Il4","Calca","Hilpda",
                 "Rorc","Fcer1g","Il22","Il17a",
                 "Cd4","Cd8a")
FeaturePlot(GEX.seur, 
            features = markers.mig,
            ncol = 4)

FeaturePlot(GEX.seur, features = "Rorc")

FeaturePlot(GEX.seur, features = "Il22")

DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

# well, this list of markers is from result of previous test analysis
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
                     "S1pr1","Sell","Sox4","Tcrg-C2",
                     "Izumo1r","Cd4","Il10","Hopx",
                     "Foxp3","Maf","Nebl","Il17a",
                     "Top2a","Mki67","Pcna","Mcm6",
                     "Ly6a","Gzma","Cd40lg","Ifng",
                     "Ccl5","Klrd1","Cd7","Itgae",
                     "Ahr",
                     "Igkc","Igha","Iglc1","Jchain",
                     "Trdc","Trdv4","S100a6","Cd163l1",
                     "Pdcd1",
                     "Jun","Fos","Egr1","Hmgn3",
                     "Rorc","Ccr6","Cd74","Cd83",
                     "Cd81","Il17f","Prr29","Car2",
                     "Tmem176a","Tmem176b","Gzmc","Irf8",
                     "Cxcl2","Cxcl3","Gzmb","Ncr1",
                     "Cxcr6","Upp1","Klrb1c","Cd69",
                     "Apoe","Lyz2","H2-Aa","C1qb",
                     "Fcer1g",
                     "Ifit1","Isg20","Stat1","Cxcl10",
                     "Calca","Gata3","Il17rb","Il13",
                     "Il4","Hilpda","Ar","Il1rl1",
                     "Il5","Cd24a","Ccl1","Areg"
                     )
#DotPlot(GEX.seur, features = markers.sort, group.by = "sort_clusters",
DotPlot(GEX.seur, features = markers.sort, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

DoubletFinder

library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
  if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
    if(i!=1){
      sweep.res.list[[i]] <- sweep.res.list[[i-1]]
    }else(
      sweep.res.list[[i]] <- sweep.res.list[[i+1]]
    )
  }
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number     
nExp_poi <- round(0.05*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 8150 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number     
nExp_poi <- round(0.1*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 8150 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

sort_clusters

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(2,11,    # naive  
                                            32, 29,  # 
                                            24,
                                            18, 13, 27, 
                                            16, 31, 33, 9, 20,  
                                            
                                            23,
                                            
                                            12, 22,5,10,6, 35, 0,  
                                            17,4,1,3,15, 19, 14, 25, 36, 26, 
                                            7, 21, 
                                            
                                            8,28, 30,37,34))
# previous ref markers
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
                     "S1pr1","Sell","Sox4","Tcrg-C2",
                     "Izumo1r","Cd4","Il10","Hopx",    "Cd8a","Cd8b1",
                     "Foxp3","Maf","Nebl","Il17a",
                     "Top2a","Mki67","Pcna","Mcm6",
                     "Ly6a","Gzma","Cd40lg","Ifng",
                     "Ccl5","Klrd1","Cd7","Itgae",
                     "Ahr",
                     "Igkc","Igha","Iglc1","Jchain",
                     "Trdc","Trdv4","S100a6","Cd163l1",
                     "Pdcd1",
                     "Jun","Fos","Egr1","Hmgn3",
                     "Rorc","Ccr6","Cd74","Cd83",
                     "Cd81","Il17f","Prr29","Car2",
                     "Tmem176a","Tmem176b","Gzmc","Irf8",
                     "Cxcl2","Cxcl3","Gzmb","Ncr1",
                     "Cxcr6","Upp1","Klrb1c","Cd69",
                     "Apoe","Lyz2","H2-Aa","C1qb",
                     "Fcer1g",
                     "Ifit1","Isg20","Stat1","Cxcl10",
                     "Calca","Gata3","Il17rb","Il13",
                     "Il4","Hilpda","Ar","Il1rl1",
                     "Il5","Cd24a","Ccl1","Areg"
                     )
DotPlot(GEX.seur, features = markers.sort, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("percent.rb"), 
        #ncol = 3, 
        ncol = 1,
        cols = color.cnt,
        pt.size = 0.1, group.by = "sort_clusters", split.by = "cnt")

VlnPlot(GEX.seur, features = c("percent.rb"), 
        #ncol = 3, 
        ncol = 1,
        cols = color.cnt,
        pt.size = 0, group.by = "sort_clusters", split.by = "cnt")

table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.05")])
##              DoubletFinder0.05
## sort_clusters Doublet Singlet
##            2        1    1535
##            11       1     886
##            32       8      75
##            29     130       3
##            24      20     255
##            18      31     599
##            13      20     790
##            27      64     162
##            16      19     646
##            31       5      93
##            33      44      23
##            9       13     934
##            20      10     497
##            23      99     196
##            12      29     840
##            22      27     416
##            5       38    1221
##            10      38     863
##            6       12    1151
##            35      50       4
##            0       22    1980
##            17       9     649
##            4       71    1239
##            1       51    1525
##            3       19    1448
##            15      12     695
##            19      23     543
##            14      68     654
##            25      18     243
##            36       1      39
##            26     120     113
##            7       56    1087
##            21      13     434
##            8       66     995
##            28       8     197
##            30       2     125
##            37       1      16
##            34       3      56
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.1")])
##              DoubletFinder0.1
## sort_clusters Doublet Singlet
##            2        3    1533
##            11      13     874
##            32      20      63
##            29     131       2
##            24      66     209
##            18      77     553
##            13     112     698
##            27     203      23
##            16      52     613
##            31      13      85
##            33      44      23
##            9       32     915
##            20      31     476
##            23     131     164
##            12      83     786
##            22      42     401
##            5       84    1175
##            10      70     831
##            6       22    1141
##            35      51       3
##            0       48    1954
##            17      32     626
##            4      164    1146
##            1      116    1460
##            3       39    1428
##            15      38     669
##            19      55     511
##            14     186     536
##            25      36     225
##            36       3      37
##            26     173      60
##            7      113    1030
##            21      34     413
##            8       91     970
##            28      19     186
##            30       4     123
##            37       5      12
##            34       9      50

markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  #test.use = "wilcox",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("20230927_10x_SZJ.new20240809.sort_markers.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
                          levels = levels(GEX.seur$sort_clusters))



markers.pre_t32 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.1) %>%
                   top_n(n = 32, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene


markers.pre_t48 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 48, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t120 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
861/60
## [1] 14.35
861/64
## [1] 13.45312
861/65
## [1] 13.24615
pp.t60 <- list()

for(i in 1:14){
pp.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t60[(64*i-63):(64*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: NA
pp.t60
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

nnn = 1447
nnn/60
## [1] 24.11667
nnn/64
## [1] 22.60938
nnn/65
## [1] 22.26154
pp.t120 <- list()

for(i in 1:23){
pp.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t120[(64*i-63):(64*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: NA
#pp.t120

Cell Composition

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$sort_clusters)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$sort_clusters)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

check

VlnPlot(GEX.seur, features = c("Cd3d","Cd4","Rorc","Il17a",
                               "Il22","Il17f","H2-K1","H2-Q7",
                               "Rps19","Tnfsf11","Ahr","Gzmc"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "sort_clusters") + NoLegend()

VlnPlot(GEX.seur, features = c("Gzmk","Gzma",
                               "Tbx21","Nkg7",
                               "Il4","Xcl1",
                               "Klrb1c","Cd160"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "sort_clusters") + NoLegend()

preAnno

####
##
#Cycling:  C26 - ILC3.Ncr1
#          C27 - Th17 (most)
#
#Mix:    C29/33/35
#        C23 - Bcell
#        C36 - Myeloid
#
#Tcell
#  Tn(Naive)    C2/11   Sell+Ccr7+
#       C32  Cd4-Trac-,very few Cd8a+,Pdcd1+   next to remove it
#  Izumo1r+Lrig1+,very few Foxp3+   C24 (like a transit to Treg)
#      let it be Treg.0
#  Treg    C18   Foxp3+ (Lzumo1r low) Il10+
#  Th17    C13   Il17a+
#      Cd40lg (hi) Ifng (hi) Fos/Egr1 (hi)   C31
#  Th1   C16  Gzmk+ Gzma+
#
#  innate like (previous  Tin )
#    iNKT   C9   Cd4-Tbx21+Il4+
#    MAIT   C20    Cd4-Cd160+Xcl1+
#
#  Th2-like   C34  Cd3d+Cd4+Foxp3-Gata3+
#  gdT       C7/21   Trdc+Trdv4+Tcrg-C1+
#                    Cd163l1+Blk+Mmp25+
#
#ILC2       C8/28/30      
#      C37   Ifit1+Isg20+Stat1+
#
#ILC3
#  Ccr6+      C12/22/5/10/6
#             C22  Cd74+MHCII+(H2-Aa/H2-Eb1-H2-Ab1)
#             C12  Jun (hi) Fos (hi) Rorc (hi)
#
#  Ccr6-Ncr1-    C0
#
#  Ncr1+      C17/4/1/3/15/19/14/25
#             C14  Jun (hi) Fos (hi) Rorc (hi)
#             C19   Cxcl9+Serpina3f+Irgm1+ Tgtp1/2+
#
##
####
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)


GEX.seur$preAnno[GEX.seur$preAnno %in% c(2,11)] <- "Tn"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(24,18)] <- "Treg"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13)] <- "Th17"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(27)] <- "Th.CC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(16)] <- "Th1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(31)] <- "Th.Egr1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(9)] <- "iNKT"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20)] <- "MAIT"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(34)] <- "Th2"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(7,21)] <- "gdT"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(8,28,30)] <- "ILC2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(37)] <- "ILC2.Ifit1"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(26)] <- "ILC3.CC"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(17,4,1,3,15,19,14,25)] <- "ILC3.Ncr1"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(0)] <- "ILC3.nn"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(12,22,5,10,6)] <- "ILC3.Ccr6"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(29,33,35,23,36,32)] <- "Mix"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c("Tn","Treg",
                                      "Th17","Th.CC","Th1","Th.Egr1","Th2",
                                      "iNKT","MAIT","gdT",
                                      "ILC2","ILC2.Ifit1",
                                      "ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6",
                                      "Mix"))
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = T, label.size = 3.2, group.by = "preAnno")

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$preAnno)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$preAnno)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

clean up

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "preAnno")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, group.by = "preAnno")

GEX.pure <- subset(GEX.seur, subset = preAnno != "Mix")
GEX.pure <- subset(GEX.pure, subset = DoubletFinder0.1=="Singlet" | preAnno %in% c("Th.CC","ILC3.CC"))
#GEX.pure <- subset(GEX.pure, subset = percent.mt < 7.5 & nFeature_RNA > 500 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.pure
## An object of class Seurat 
## 16621 features across 22088 samples within 1 assay 
## Active assay: RNA (16621 features, 1224 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.pure, reduction = "umap", label = T, label.size = 3.2, group.by = "preAnno")

VlnPlot(GEX.pure, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "preAnno")

VlnPlot(GEX.pure, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, group.by = "preAnno")

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
      clusters=GEX.pure$preAnno)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
                                clusters=GEX.pure$preAnno)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

# previous ref markers
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
                     "S1pr1","Sell","Sox4","Tcrg-C2",
                     "Izumo1r","Cd4","Il10","Hopx",    "Cd8a","Cd8b1",
                     "Foxp3","Maf","Nebl","Il17a",
                     "Top2a","Mki67","Pcna","Mcm6",
                     "Ly6a","Gzma","Cd40lg","Ifng",
                     "Ccl5","Klrd1","Cd7","Itgae",
                     "Ahr",
                     "Igkc","Igha","Iglc1","Jchain",
                     "Trdc","Trdv4","S100a6","Cd163l1",
                     "Pdcd1",
                     "Jun","Fos","Egr1","Hmgn3",
                     "Rorc","Ccr6","Cd74","Cd83",
                     "Cd81","Il17f","Prr29","Car2",
                     "Tmem176a","Tmem176b","Gzmc","Irf8",
                     "Cxcl2","Cxcl3","Gzmb","Ncr1",
                     "Cxcr6","Upp1","Klrb1c","Cd69",
                     "Apoe","Lyz2","H2-Aa","C1qb",
                     "Fcer1g",
                     "Ifit1","Isg20","Stat1","Cxcl10",
                     "Calca","Gata3","Il17rb","Il13",
                     "Il4","Hilpda","Ar","Il1rl1",
                     "Il5","Cd24a","Ccl1","Areg"
                     )
markers.new <-     c("Cd3d","Cd3e","Cd4","Cd8a",
                     "Klf2","Ccr7","Lef1","S1pr1",
                     "Tubb2a","Rflnb","Igfbp4","Sell",
                     "Itm2a","Itga6","Smc4",#"Sox4","Pdcd1",
                     "Ptprc",#"Fam241a","Slamf6",
                     "Izumo1r","Tbc1d4",
                     "Ctla4","Il10", 
                     "Hopx","Foxp3","Ccr2","Maf",
                     "Capg","Slamf1","Icos",
                     "Il17a","Nebl","S100a11","S100a10",
                     "Top2a","Mki67","Pcna","Mcm6",
                     
                     "Fasl","Gzma","Gzmk","Ccr5",
                     "Il12rb2",
                     "Jun","Fos","Ier2","Egr1",
                     
                     "Ifng", "Tnf",
                     "Tbx21",
                     "Klrb1c","Xcl1","Nkg7","Plac8","Cd160",
                     
                     
                     
                     "Ccl5",
                     "Klrd1","Cd7","Itgae",
                     
                     "Tcrg-C1","Trdv4","Cd163l1","Blk","Mmp25",
                     
                     "Calca","Gata3","Il17rb","Il13",
                     "Csf2","Dgat2",
                     "Il4","Hilpda","Ar","Il1rl1",
                     "Il5",#"Cd24a",
                     "Ccl1","Areg","Ahr",
                     "Cxcl10",
                     "Gbp6","Stat1","Isg15","Ifit1",
                     
                     "Ncr1","Gzmc","Gzmb","Irf8",
                     "Cxcr6","Irf7","Slc16a6","Klrk1",
                     "Tmem176b","Fcer1g","Car2","Ckb",
                     "Cd69","Jaml","Cxcl9",
                     "Il22","Gem","Pxdc1","Cdc14a",
                     "Sdc4","Vegfa","Bst2","Nmrk1",
                     "Hmgn3","Cd81","Cx3cl1",
                     "Cd74","H2-Aa","H2-Eb1","H2-Ab1",
                     
                     
                     "Ccr6","Batf3","Rorc","Maff",
                     "Icam1","Il18r1",
                     
                     "Il17f"
                     
                     )

DotPlot(GEX.pure, features = markers.new, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 9)) + 
  scale_y_discrete(limits=rev)

DotPlot(GEX.seur, features = markers.new, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 9)) + 
  scale_y_discrete(limits=rev)

#saveRDS(GEX.seur, "./20230927_10x_SZJ.new20270809.preAnno.rds")
VlnPlot(GEX.pure, features = c("Cd3d","Cd4","Rorc","Il17a",
                               "Tbx21","Xcl1","Cd160","Cd7",
                               "Klrb1c","Ifng","Mr1","Cd1d1"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "preAnno") + NoLegend()

VlnPlot(GEX.pure, features = c("Cd160","Plac8","Klrd1","Itgae",
                               "Tcrg-C1","Trdv4","Cd163l1","Blk",
                               "Gzma","Gzmk","Ccr5","Ccl5"), 
        #ncol = 3, 
        ncol = 2,
        #cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, group.by = "preAnno") + NoLegend()

stat

GEX.pure$rep <- gsub("CKO.|CTL.","rep",as.character(GEX.pure$FB.info))
GEX.pure$rep[1:5]
## AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1 AAACCCAAGCCTGAAG-1 
##             "rep2"             "rep3"             "rep4"             "rep1" 
## AAACCCAAGCGCATCC-1 
##             "rep2"
GEX.pure$preAnno <- factor(as.character(GEX.pure$preAnno),
                           levels = setdiff(levels(GEX.seur$preAnno),"Mix"))
stat_preAnno <- GEX.pure@meta.data[,c("cnt","rep","preAnno")]

stat_preAnno.s <- stat_preAnno %>%
  group_by(cnt,rep,preAnno) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
  ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title="Cell Composition, preAnno", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.preAnno

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.preAnno <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      total.N <- stat_preAnno.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
                                            "_",
                                            stat_preAnno.s_N$rep),"total"]
      
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(stat_preAnno.s_N$preAnno)){
        glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
##                 Tn       Treg      Th17     Th.CC       Th1   Th.Egr1
## CTLvsCKO 0.4031199 0.01076891 0.8867657 0.6924516 0.8183861 0.4245076
##                 Th2      iNKT      MAIT        gdT      ILC2 ILC2.Ifit1
## CTLvsCKO 0.03566346 0.9218254 0.4287892 0.01213637 0.6876901  0.9914385
##             ILC3.CC ILC3.Ncr1   ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.01948181 0.6554751 0.5065082 0.9362884
round(glm.nb_anova.preAnno_df,4)
##              Tn   Treg   Th17  Th.CC    Th1 Th.Egr1    Th2   iNKT   MAIT    gdT
## CTLvsCKO 0.4031 0.0108 0.8868 0.6925 0.8184  0.4245 0.0357 0.9218 0.4288 0.0121
##            ILC2 ILC2.Ifit1 ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.6877     0.9914  0.0195    0.6555  0.5065    0.9363

DEG

Idents(GEX.pure) <- "cnt"
list_sort <- list()

for(nn in levels(GEX.pure$preAnno)){
  list_sort[[nn]] <- c(nn)
}


names_sort <- names(list_sort)
data.frame(list_sort)
##   Tn Treg Th17 Th.CC Th1 Th.Egr1 Th2 iNKT MAIT gdT ILC2 ILC2.Ifit1 ILC3.CC
## 1 Tn Treg Th17 Th.CC Th1 Th.Egr1 Th2 iNKT MAIT gdT ILC2 ILC2.Ifit1 ILC3.CC
##   ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## 1 ILC3.Ncr1 ILC3.nn ILC3.Ccr6

run

GEX.DEGs_sort <- list()

for(nn in names_sort){
  GEX.DEGs_sort[[nn]] <- FindAllMarkers(subset(GEX.pure, subset = preAnno %in% list_sort[[nn]]),
                                       assay = "RNA",
                                       only.pos = T,
                                       min.pct = 0.05,
                                       logfc.threshold = 0.1,
                                       test.use = "MAST")
}
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTL
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CKO
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
lapply(GEX.DEGs_sort, dim)
## $Tn
## [1] 86  7
## 
## $Treg
## [1] 77  7
## 
## $Th17
## [1] 51  7
## 
## $Th.CC
## [1] 70  7
## 
## $Th1
## [1] 72  7
## 
## $Th.Egr1
## [1] 43  7
## 
## $Th2
## [1] 47  7
## 
## $iNKT
## [1] 92  7
## 
## $MAIT
## [1] 67  7
## 
## $gdT
## [1] 101   7
## 
## $ILC2
## [1] 82  7
## 
## $ILC2.Ifit1
## [1] 12  7
## 
## $ILC3.CC
## [1] 99  7
## 
## $ILC3.Ncr1
## [1] 192   7
## 
## $ILC3.nn
## [1] 86  7
## 
## $ILC3.Ccr6
## [1] 120   7
#GEX.DEGs_sort
# save DEGs' table
df.DEGs_sort <- data.frame()
for(nn in names_sort){
  df.DEGs_sort <- rbind(df.DEGs_sort, data.frame(GEX.DEGs_sort[[nn]],preAnno=nn))
}

#write.csv(df.DEGs_sort, "20230927_10x_SZJ.new20270809.DEGs.CTLvsCKO.preAnno.csv")

DEG stat

df.DEGs_sort$preAnno <- factor(df.DEGs_sort$preAnno,
                               levels = levels(GEX.pure$preAnno))
cut.padj = 0.05
cut.log2FC = 0.1   
  
cut.pct1 = 0.05

df.DEGs_sort %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(preAnno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
##      preAnno cluster up.DEGs
## 1         Tn     CKO       4
## 2      Th.CC     CKO       1
## 3        Th1     CTL       1
## 4       iNKT     CKO       1
## 5        gdT     CKO       4
## 6       ILC2     CKO       2
## 7  ILC3.Ncr1     CTL      20
## 8  ILC3.Ncr1     CKO      20
## 9    ILC3.nn     CTL       3
## 10   ILC3.nn     CKO       1
## 11 ILC3.Ccr6     CTL       3
## 12 ILC3.Ccr6     CKO       5
df.DEGs_sort %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(preAnno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=preAnno, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.1, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))

DEG plot

pp.DEGs.sort <- lapply(list_sort,function(x){NA})
GEX.DEGs_sort.combine <- GEX.DEGs_sort
GEX.DEGs_sort.combine <- lapply(GEX.DEGs_sort.combine, function(x){
  x[x$cluster=="CTL","avg_log2FC"] <- -x[x$cluster=="CTL","avg_log2FC"]
  x
})

Tn

pp.DEGs.sort$Tn <- GEX.DEGs_sort.combine$Tn %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="Tn CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Tn

Th.CC

pp.DEGs.sort$Th.CC <- GEX.DEGs_sort.combine$Th.CC %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="Th.CC CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Th.CC

Th1

pp.DEGs.sort$Th1 <- GEX.DEGs_sort.combine$Th1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="Th1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Th1

iNKT

pp.DEGs.sort$iNKT <- GEX.DEGs_sort.combine$iNKT %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="iNKT CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$iNKT

gdT

pp.DEGs.sort$gdT <- GEX.DEGs_sort.combine$gdT %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="gdT CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$gdT

ILC2

pp.DEGs.sort$ILC2 <- GEX.DEGs_sort.combine$ILC2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC2

ILC3.Ncr1

pp.DEGs.sort$ILC3.Ncr1 <- GEX.DEGs_sort.combine$ILC3.Ncr1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="ILC3.Ncr1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.Ncr1

ILC3.nn

pp.DEGs.sort$ILC3.nn <- GEX.DEGs_sort.combine$ILC3.nn %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="ILC3.nn CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.nn

ILC3.Ccr6

pp.DEGs.sort$ILC3.Ccr6 <- GEX.DEGs_sort.combine$ILC3.Ccr6 %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="ILC3.Ccr6 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.Ccr6

Treg

pp.DEGs.sort$Treg <- GEX.DEGs_sort.combine$Treg %>%
  mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"="Skyblue",
                               "CKO"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="Treg CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Treg

ILCs only

GEX.sub <- subset(GEX.pure, subset = preAnno %in% c("ILC2","ILC2.Ifit1",
                                                    "ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6"))
GEX.sub
## An object of class Seurat 
## 16621 features across 14413 samples within 1 assay 
## Active assay: RNA (16621 features, 1224 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.sub$preAnno <- as.character(GEX.sub$preAnno)
GEX.sub$preAnno[GEX.sub$preAnno %in% c("ILC2.Ifit1")] <- "ILC2"

GEX.sub$preAnno <- factor(GEX.sub$preAnno,
                          levels = c("ILC2",
                                     "ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6"))
DimPlot(GEX.sub, reduction = "umap", label = T, group.by = "preAnno") +
  DimPlot(GEX.sub, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.sub$FB.info,
      clusters=GEX.sub$preAnno)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.sub$FB.info,
                                clusters=GEX.sub$preAnno)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

stat_preAnno <- GEX.sub@meta.data[,c("cnt","rep","preAnno")]

stat_preAnno.s <- stat_preAnno %>%
  group_by(cnt,rep,preAnno) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
  ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title="Cell Composition, preAnno", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.preAnno

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.preAnno <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      total.N <- stat_preAnno.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
                                            "_",
                                            stat_preAnno.s_N$rep),"total"]
      
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(stat_preAnno.s_N$preAnno)){
        glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
##               ILC2    ILC3.CC ILC3.Ncr1   ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.5416787 0.01492285 0.4265305 0.2850411 0.9590854
round(glm.nb_anova.preAnno_df,5)
##             ILC2 ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.54168 0.01492   0.42653 0.28504   0.95909